EDA - Clusters - 3BIO

In [160]:
# -*- coding: utf-8 -*-
In [161]:
%%HTML
<script src="require.js"></script>
In [162]:
# Importar librerías
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from matplotlib.pyplot import figure
import plotly.express as px
import seaborn as sns
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default='notebook'

Data

In [163]:
from google.colab import drive
drive.mount('/content/drive/')
Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
In [164]:
%cd /content/drive/My Drive/3BIO-Cluster-DataAnalysis
/content/drive/My Drive/3BIO-Cluster-DataAnalysis
In [165]:
%ls
articulos.csv        Cluster.infoGrupos.csv
Cluster.cluster.csv  Cluster.infoInstituciones.csv
In [166]:
df = pd.read_csv("Cluster.cluster.csv")
df
Out[166]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
0 0 4.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 ... 1.0 4.0 0.0 0.0 0.0 0.0 0.0 0.0 A.I.D Biohacking Colombia Avalado 1
1 1 10.0 1.0 3.0 1.0 2.0 4.0 0.0 1.0 0.0 ... 7.0 1.0 0.0 3.0 0.0 0.0 1.0 0.0 Aalborg University No Avalado 1
2 2 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 1.0 2.0 0.0 0.0 0.0 0.0 Abogato Jurídico S.A.S. Avalado 1
3 3 9.0 0.0 0.0 2.0 7.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 Abya-Ayala No Avalado 1
4 4 5.0 0.0 0.0 0.0 0.0 12.0 0.0 0.0 1.0 ... 1.0 10.0 1.0 16.0 0.0 0.0 1.0 0.0 Academia Américana De Psiquiatría No Avalado 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1437 1437 31.0 11.0 2.0 0.0 1.0 7.0 0.0 2.0 0.0 ... 6.0 10.0 1.0 8.0 0.0 0.0 3.0 0.0 Washington State University No Avalado 0
1438 1438 141.0 18.0 14.0 14.0 24.0 19.0 0.0 0.0 0.0 ... 10.0 22.0 3.0 20.0 0.0 0.0 6.0 3.0 Yarima Guadua E. U. No Avalado 4
1439 1439 25.0 2.0 4.0 7.0 2.0 18.0 0.0 0.0 0.0 ... 1.0 3.0 0.0 2.0 0.0 0.0 2.0 0.0 Yoluka Ong, Fundación De Investigación En Biod... 1
1440 1440 0.0 0.0 0.0 0.0 0.0 0.0 11.0 0.0 0.0 ... 0.0 5.0 1.0 4.0 0.0 0.0 2.0 0.0 Zumo Tecnologia Avalado 1
1441 1441 16.0 6.0 2.0 0.0 3.0 0.0 0.0 0.0 0.0 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 Zurich S.A. No Avalado 1

1442 rows × 22 columns

In [167]:
df.head(5)
Out[167]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
0 0 4.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 ... 1.0 4.0 0.0 0.0 0.0 0.0 0.0 0.0 A.I.D Biohacking Colombia Avalado 1
1 1 10.0 1.0 3.0 1.0 2.0 4.0 0.0 1.0 0.0 ... 7.0 1.0 0.0 3.0 0.0 0.0 1.0 0.0 Aalborg University No Avalado 1
2 2 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 1.0 2.0 0.0 0.0 0.0 0.0 Abogato Jurídico S.A.S. Avalado 1
3 3 9.0 0.0 0.0 2.0 7.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 Abya-Ayala No Avalado 1
4 4 5.0 0.0 0.0 0.0 0.0 12.0 0.0 0.0 1.0 ... 1.0 10.0 1.0 16.0 0.0 0.0 1.0 0.0 Academia Américana De Psiquiatría No Avalado 1

5 rows × 22 columns

In [168]:
df.tail(5)
Out[168]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
1437 1437 31.0 11.0 2.0 0.0 1.0 7.0 0.0 2.0 0.0 ... 6.0 10.0 1.0 8.0 0.0 0.0 3.0 0.0 Washington State University No Avalado 0
1438 1438 141.0 18.0 14.0 14.0 24.0 19.0 0.0 0.0 0.0 ... 10.0 22.0 3.0 20.0 0.0 0.0 6.0 3.0 Yarima Guadua E. U. No Avalado 4
1439 1439 25.0 2.0 4.0 7.0 2.0 18.0 0.0 0.0 0.0 ... 1.0 3.0 0.0 2.0 0.0 0.0 2.0 0.0 Yoluka Ong, Fundación De Investigación En Biod... 1
1440 1440 0.0 0.0 0.0 0.0 0.0 0.0 11.0 0.0 0.0 ... 0.0 5.0 1.0 4.0 0.0 0.0 2.0 0.0 Zumo Tecnologia Avalado 1
1441 1441 16.0 6.0 2.0 0.0 3.0 0.0 0.0 0.0 0.0 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 Zurich S.A. No Avalado 1

5 rows × 22 columns

In [169]:
df.describe()
Out[169]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... integrantes doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Cluster
count 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 ... 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000 1442.000000
mean 720.500000 46.326707 9.204904 4.451858 5.737441 4.512760 14.951870 2.863380 0.487422 2.163988 ... 24.466450 3.901725 7.175475 2.096509 4.895118 0.472464 0.036789 1.284829 0.321616 1.013176
std 416.413857 72.402873 22.580558 10.017817 11.531934 9.774583 27.869376 9.718173 2.066843 6.035439 ... 22.095387 4.598790 6.990658 2.847382 6.901498 0.794647 0.166880 1.492147 0.827242 1.123452
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 360.250000 7.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 ... 9.000000 1.000000 2.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000
50% 720.500000 24.833333 2.000000 1.000000 2.000000 1.679487 7.000000 0.000000 0.000000 0.000000 ... 20.127778 2.500000 6.000000 1.000000 3.000000 0.000000 0.000000 1.000000 0.000000 1.000000
75% 1080.750000 61.000000 9.000000 5.000000 6.491667 4.898684 19.000000 2.000000 0.166667 2.000000 ... 33.903125 5.359037 10.000000 3.000000 6.000000 0.868841 0.000000 2.000000 0.317995 1.000000
max 1441.000000 933.000000 315.000000 148.000000 155.000000 103.000000 629.000000 163.000000 45.000000 80.000000 ... 158.000000 44.500000 70.000000 22.000000 84.000000 5.000000 2.000000 16.000000 12.000000 4.000000

8 rows × 21 columns

In [170]:
df.sort_values
Out[170]:
<bound method DataFrame.sort_values of       Unnamed: 0  articulos  SJR_Q_Q1  SJR_Q_Q2  SJR_Q_Q3  SJR_Q_Q4  \
0              0        4.0       0.0       0.0       1.0       1.0   
1              1       10.0       1.0       3.0       1.0       2.0   
2              2        2.0       0.0       0.0       0.0       0.0   
3              3        9.0       0.0       0.0       2.0       7.0   
4              4        5.0       0.0       0.0       0.0       0.0   
...          ...        ...       ...       ...       ...       ...   
1437        1437       31.0      11.0       2.0       0.0       1.0   
1438        1438      141.0      18.0      14.0      14.0      24.0   
1439        1439       25.0       2.0       4.0       7.0       2.0   
1440        1440        0.0       0.0       0.0       0.0       0.0   
1441        1441       16.0       6.0       2.0       0.0       3.0   

      capitulos  innovaciones  libros  softwares  ...  doctorado  maestria  \
0           0.0           0.0     0.0        0.0  ...        1.0       4.0   
1           4.0           0.0     1.0        0.0  ...        7.0       1.0   
2           0.0           0.0     0.0        0.0  ...        0.0       1.0   
3           0.0           0.0     0.0        0.0  ...        0.0       0.0   
4          12.0           0.0     0.0        1.0  ...        1.0      10.0   
...         ...           ...     ...        ...  ...        ...       ...   
1437        7.0           0.0     2.0        0.0  ...        6.0      10.0   
1438       19.0           0.0     0.0        0.0  ...       10.0      22.0   
1439       18.0           0.0     0.0        0.0  ...        1.0       3.0   
1440        0.0          11.0     0.0        0.0  ...        0.0       5.0   
1441        0.0           0.0     0.0        0.0  ...        1.0       0.0   

      Especializacion  pregrado  Investigador_Asociado  Investigador_Emerito  \
0                 0.0       0.0                    0.0                   0.0   
1                 0.0       3.0                    0.0                   0.0   
2                 1.0       2.0                    0.0                   0.0   
3                 0.0       1.0                    0.0                   0.0   
4                 1.0      16.0                    0.0                   0.0   
...               ...       ...                    ...                   ...   
1437              1.0       8.0                    0.0                   0.0   
1438              3.0      20.0                    0.0                   0.0   
1439              0.0       2.0                    0.0                   0.0   
1440              1.0       4.0                    0.0                   0.0   
1441              1.0       0.0                    0.0                   0.0   

      Investigador_Junior  Investigador Senior  \
0                     0.0                  0.0   
1                     1.0                  0.0   
2                     0.0                  0.0   
3                     0.0                  0.0   
4                     1.0                  0.0   
...                   ...                  ...   
1437                  3.0                  0.0   
1438                  6.0                  3.0   
1439                  2.0                  0.0   
1440                  2.0                  0.0   
1441                  0.0                  0.0   

                                          Instituciones  Cluster  
0                     A.I.D Biohacking Colombia Avalado        1  
1                         Aalborg University No Avalado        1  
2                       Abogato Jurídico S.A.S. Avalado        1  
3                                 Abya-Ayala No Avalado        1  
4          Academia Américana De Psiquiatría No Avalado        1  
...                                                 ...      ...  
1437             Washington State University No Avalado        0  
1438                     Yarima Guadua E. U. No Avalado        4  
1439  Yoluka Ong, Fundación De Investigación En Biod...        1  
1440                            Zumo Tecnologia Avalado        1  
1441                             Zurich S.A. No Avalado        1  

[1442 rows x 22 columns]>
In [171]:
"""### Conocer estructura del DataFrame:"""
type(df)
Out[171]:
pandas.core.frame.DataFrame
In [172]:
type(df["articulos"])
Out[172]:
pandas.core.series.Series
In [173]:
df.columns
Out[173]:
Index(['Unnamed: 0', 'articulos', 'SJR_Q_Q1', 'SJR_Q_Q2', 'SJR_Q_Q3',
       'SJR_Q_Q4', 'capitulos', 'innovaciones', 'libros', 'softwares',
       'trabajos_grado', 'integrantes', 'doctorado', 'maestria',
       'Especializacion', 'pregrado', 'Investigador_Asociado',
       'Investigador_Emerito', 'Investigador_Junior', 'Investigador Senior',
       'Instituciones', 'Cluster'],
      dtype='object')
In [174]:
# Nombres de índices.
df.index
#list(df.index)
Out[174]:
RangeIndex(start=0, stop=1442, step=1)
In [175]:
# Imprimir los tipos de datos por columna (o por variable).
print(df.dtypes)
Unnamed: 0                 int64
articulos                float64
SJR_Q_Q1                 float64
SJR_Q_Q2                 float64
SJR_Q_Q3                 float64
SJR_Q_Q4                 float64
capitulos                float64
innovaciones             float64
libros                   float64
softwares                float64
trabajos_grado           float64
integrantes              float64
doctorado                float64
maestria                 float64
Especializacion          float64
pregrado                 float64
Investigador_Asociado    float64
Investigador_Emerito     float64
Investigador_Junior      float64
Investigador Senior      float64
Instituciones             object
Cluster                    int64
dtype: object
In [176]:
# Cantidad de columnas e imprimir su valor columnas
n_col=len(df.columns)
print("Cantidad de columnas: ",n_col )
Cantidad de columnas:  22
In [177]:
# La Cantidad total de registros (filas) 
n_reg= len(df.index)
print("Cantidad de registros : ", n_reg)
Cantidad de registros :  1442
In [178]:
# Cantidad de cantidad de registros completos (sin los valores NA).
reg_com=len(df.dropna())
print("Registros completos sin NA", reg_com)
Registros completos sin NA 1442
In [179]:
# Cantidad total de registros incompletos (filas con valores NaN).
reg_incom = n_reg - reg_com
print("Cantidad de registros incompletos: ", reg_incom)
Cantidad de registros incompletos:  0
In [180]:
# Clcular la cantidad de registros completos por variable.
nr_com= df.count()
print("Cantidad de registros completos por variable:\n ",nr_com)
Cantidad de registros completos por variable:
  Unnamed: 0               1442
articulos                1442
SJR_Q_Q1                 1442
SJR_Q_Q2                 1442
SJR_Q_Q3                 1442
SJR_Q_Q4                 1442
capitulos                1442
innovaciones             1442
libros                   1442
softwares                1442
trabajos_grado           1442
integrantes              1442
doctorado                1442
maestria                 1442
Especializacion          1442
pregrado                 1442
Investigador_Asociado    1442
Investigador_Emerito     1442
Investigador_Junior      1442
Investigador Senior      1442
Instituciones            1442
Cluster                  1442
dtype: int64
In [181]:
# Cantidad de registros incompletos por variable.
print("Cantidad de registros incompletos por variable:")
print(n_reg - nr_com)
Cantidad de registros incompletos por variable:
Unnamed: 0               0
articulos                0
SJR_Q_Q1                 0
SJR_Q_Q2                 0
SJR_Q_Q3                 0
SJR_Q_Q4                 0
capitulos                0
innovaciones             0
libros                   0
softwares                0
trabajos_grado           0
integrantes              0
doctorado                0
maestria                 0
Especializacion          0
pregrado                 0
Investigador_Asociado    0
Investigador_Emerito     0
Investigador_Junior      0
Investigador Senior      0
Instituciones            0
Cluster                  0
dtype: int64
In [182]:
# Describir los valores estadísticos del DataFrame depurado.
df.describe()
df.plot()
df
Out[182]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
0 0 4.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 ... 1.0 4.0 0.0 0.0 0.0 0.0 0.0 0.0 A.I.D Biohacking Colombia Avalado 1
1 1 10.0 1.0 3.0 1.0 2.0 4.0 0.0 1.0 0.0 ... 7.0 1.0 0.0 3.0 0.0 0.0 1.0 0.0 Aalborg University No Avalado 1
2 2 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 1.0 2.0 0.0 0.0 0.0 0.0 Abogato Jurídico S.A.S. Avalado 1
3 3 9.0 0.0 0.0 2.0 7.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 Abya-Ayala No Avalado 1
4 4 5.0 0.0 0.0 0.0 0.0 12.0 0.0 0.0 1.0 ... 1.0 10.0 1.0 16.0 0.0 0.0 1.0 0.0 Academia Américana De Psiquiatría No Avalado 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1437 1437 31.0 11.0 2.0 0.0 1.0 7.0 0.0 2.0 0.0 ... 6.0 10.0 1.0 8.0 0.0 0.0 3.0 0.0 Washington State University No Avalado 0
1438 1438 141.0 18.0 14.0 14.0 24.0 19.0 0.0 0.0 0.0 ... 10.0 22.0 3.0 20.0 0.0 0.0 6.0 3.0 Yarima Guadua E. U. No Avalado 4
1439 1439 25.0 2.0 4.0 7.0 2.0 18.0 0.0 0.0 0.0 ... 1.0 3.0 0.0 2.0 0.0 0.0 2.0 0.0 Yoluka Ong, Fundación De Investigación En Biod... 1
1440 1440 0.0 0.0 0.0 0.0 0.0 0.0 11.0 0.0 0.0 ... 0.0 5.0 1.0 4.0 0.0 0.0 2.0 0.0 Zumo Tecnologia Avalado 1
1441 1441 16.0 6.0 2.0 0.0 3.0 0.0 0.0 0.0 0.0 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 Zurich S.A. No Avalado 1

1442 rows × 22 columns

In [183]:
# Hacer estadísticas del cluster

df['Cluster'].describe()
Out[183]:
count    1442.000000
mean        1.013176
std         1.123452
min         0.000000
25%         0.000000
50%         1.000000
75%         1.000000
max         4.000000
Name: Cluster, dtype: float64
In [184]:
df.columns
Out[184]:
Index(['Unnamed: 0', 'articulos', 'SJR_Q_Q1', 'SJR_Q_Q2', 'SJR_Q_Q3',
       'SJR_Q_Q4', 'capitulos', 'innovaciones', 'libros', 'softwares',
       'trabajos_grado', 'integrantes', 'doctorado', 'maestria',
       'Especializacion', 'pregrado', 'Investigador_Asociado',
       'Investigador_Emerito', 'Investigador_Junior', 'Investigador Senior',
       'Instituciones', 'Cluster'],
      dtype='object')

Generación de Nuevo Conocimiento

In [185]:
generacion = df.groupby('Cluster').sum().reset_index()[['Cluster','articulos','capitulos', 'innovaciones', 'libros', 'softwares']]
generacion[['articulos','capitulos', 'innovaciones', 'libros', 'softwares']]=generacion[['articulos','capitulos', 'innovaciones', 'libros', 'softwares']].div(generacion[['articulos','capitulos', 'innovaciones', 'libros', 'softwares']].sum(axis=1), axis=0).round(4) * 100
generacion
Out[185]:
Cluster articulos capitulos innovaciones libros softwares
0 0 70.73 22.98 2.46 0.72 3.11
1 1 64.23 24.56 7.56 0.72 2.94
2 2 63.24 23.61 5.75 0.98 6.41
3 3 83.82 15.36 0.37 0.24 0.21
4 4 68.16 19.66 6.97 0.96 4.24
In [186]:
fig = px.bar(generacion, x="Cluster", y =['articulos','capitulos', 'innovaciones', 'libros', 'softwares'], title="Generacion de nuevo conocimiento")
fig.show()

Instituciónes

In [187]:
instit = df.groupby('Cluster').sum().reset_index()[['Cluster','trabajos_grado', 'integrantes', 'doctorado', 'maestria','Especializacion', 'pregrado']]
instit[['trabajos_grado', 'integrantes', 'doctorado', 'maestria','Especializacion', 'pregrado']]=instit[['trabajos_grado', 'integrantes', 'doctorado', 'maestria','Especializacion', 'pregrado']].div(instit[['trabajos_grado', 'integrantes', 'doctorado', 'maestria','Especializacion', 'pregrado']].sum(axis=1), axis=0).round(4) * 100
instit
Out[187]:
Cluster trabajos_grado integrantes doctorado maestria Especializacion pregrado
0 0 63.76 20.65 3.99 6.24 1.31 4.04
1 1 56.93 25.23 3.31 7.46 2.32 4.75
2 2 61.32 21.18 5.64 7.64 1.19 3.03
3 3 48.57 29.39 5.70 7.72 2.41 6.20
4 4 37.16 36.90 2.88 9.31 4.86 8.88
In [188]:
fig = px.bar(instit, x="Cluster", y =['trabajos_grado', 'integrantes', 'doctorado', 'maestria','Especializacion', 'pregrado'], title="Clasificación Acádemica")
fig.show()

Clasificación de Investigadores

In [189]:
investigadores = df.groupby('Cluster').sum().reset_index()[['Cluster', 'Investigador_Asociado','Investigador_Emerito', 'Investigador_Junior', 'Investigador Senior']]
investigadores[['Investigador_Asociado','Investigador_Emerito', 'Investigador_Junior', 'Investigador Senior' ]]=investigadores[['Investigador_Asociado','Investigador_Emerito', 'Investigador_Junior', 'Investigador Senior',]].div(investigadores[['Investigador_Asociado','Investigador_Emerito', 'Investigador_Junior', 'Investigador Senior']].sum(axis=1), axis=0).round(4) * 100
investigadores
Out[189]:
Cluster Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior
0 0 23.23 2.04 62.39 12.34
1 1 14.52 0.44 72.79 12.25
2 2 34.30 2.13 40.10 23.48
3 3 20.77 3.87 49.53 25.82
4 4 12.48 1.21 72.59 13.72
In [190]:
fig = px.bar(investigadores, x="Cluster", y =['Investigador_Asociado','Investigador_Emerito', 'Investigador_Junior', 'Investigador Senior'], title="Clasificación de Investigadores")
fig.show()
In [191]:
sjr_cluster = df.groupby('Cluster').sum().reset_index()[['Cluster','SJR_Q_Q1','SJR_Q_Q2','SJR_Q_Q3','SJR_Q_Q4' ]]
sjr_cluster[['SJR_Q_Q1','SJR_Q_Q2','SJR_Q_Q3','SJR_Q_Q4' ]]=sjr_cluster[['SJR_Q_Q1','SJR_Q_Q2','SJR_Q_Q3','SJR_Q_Q4']].div(sjr_cluster[['SJR_Q_Q1','SJR_Q_Q2','SJR_Q_Q3','SJR_Q_Q4']].sum(axis=1), axis=0).round(4) * 100
sjr_cluster
Out[191]:
Cluster SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4
0 0 36.63 18.08 24.62 20.67
1 1 38.88 19.70 23.14 18.29
2 2 33.78 17.87 30.22 18.13
3 3 44.97 18.82 21.25 14.96
4 4 40.97 19.06 21.80 18.16

Scimago Ranking SJR Q

In [192]:
fig = px.bar(sjr_cluster, x="Cluster", y =['SJR_Q_Q1','SJR_Q_Q2','SJR_Q_Q3','SJR_Q_Q4'], title="Articulos Scimago Ranking SJR Q")
fig.show()
In [193]:
fig = px.bar(sjr_cluster, x="Cluster", y = ['SJR_Q_Q1'], title="Articulos Q1 en SJR",text_auto= True,labels={'x':'Clusters','y':'Articulos'},color_discrete_map={'SJR_Q_Q1':'blue'})
fig.show()
In [194]:
fig = px.bar(sjr_cluster, x="Cluster", y = ['SJR_Q_Q2'], title="Articulos Q2 en SJR",text_auto= True,labels={'x':'Clusters','y':'Articulos'},color_discrete_map={'SJR_Q_Q2':'red'})
fig.show()
In [195]:
fig = px.bar(sjr_cluster, x="Cluster", y = ['SJR_Q_Q3'], title="Articulos Q3 en SJR",text_auto= True,labels={'x':'Clusters','y':'Articulos'},color_discrete_map={'SJR_Q_Q3':'orange'})
fig.show()
In [196]:
fig = px.bar(sjr_cluster, x="Cluster", y = ['SJR_Q_Q4'], title="Articulos Q4 en SJR",text_auto= True,labels={'x':'Clusters','y':'Articulos'},color_discrete_map={'SJR_Q_Q4':'green'})
fig.show()

Participación de Clusters

In [197]:
fig = px.pie(df.groupby('Cluster').size().reset_index(), values=0, names='Cluster', title="Participación de Clusters")
fig.show()

Analisis de varianza

Cluster 0

In [198]:
df0 = df[df.Cluster == 0]
df0
Out[198]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
5 5 18.0 0.0 1.0 0.0 0.0 44.0 0.0 3.0 0.0 ... 6.0 5.0 0.0 11.0 1.0 0.0 1.0 0.0 Academia Antioqueña De Historia No Avalado 0
7 7 102.0 0.0 0.0 1.0 0.0 44.0 0.0 0.0 0.0 ... 14.0 4.0 0.0 1.0 1.0 1.0 1.0 0.0 Academia Colombiana De La Lengua No Avalado 0
8 8 18.0 0.0 1.0 4.0 1.0 52.0 2.0 3.0 0.0 ... 3.0 13.0 1.0 0.0 0.0 0.0 4.0 0.0 Academia Peruana De Cirugia No Avalado 0
11 11 188.0 7.0 2.0 24.0 53.0 4.0 0.0 0.0 22.0 ... 3.0 7.0 4.0 21.0 1.0 0.0 2.0 1.0 Administradora Country S.A- Centro De Investig... 0
22 22 239.0 5.0 2.0 8.0 16.0 22.0 0.0 0.0 2.0 ... 5.0 13.0 2.0 9.0 0.0 0.0 1.0 0.0 Alcaldía De Medellin No Avalado 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1429 1429 46.0 7.0 10.0 7.0 1.0 25.0 0.0 0.0 0.0 ... 10.0 10.0 0.0 2.0 0.0 0.0 0.0 1.0 University Of Missouri System No Avalado 0
1430 1430 325.0 67.0 20.0 39.0 54.0 67.0 23.0 1.0 2.0 ... 5.0 5.0 2.0 7.0 0.0 0.0 3.0 0.0 University Of Western Australia No Avalado 0
1431 1431 59.0 44.0 5.0 1.0 3.0 2.0 0.0 0.0 0.0 ... 12.0 0.0 0.0 0.0 1.0 0.0 6.0 0.0 Vedas Corporacion De Investigacion E Innovacio... 0
1433 1433 6.0 0.0 0.0 1.0 0.0 25.0 1.0 2.0 25.0 ... 5.0 13.0 1.0 4.0 0.0 0.0 1.0 0.0 Veritas Aid Avalado 0
1437 1437 31.0 11.0 2.0 0.0 1.0 7.0 0.0 2.0 0.0 ... 6.0 10.0 1.0 8.0 0.0 0.0 3.0 0.0 Washington State University No Avalado 0

480 rows × 22 columns

In [199]:
fig = px.box(df0, y=['articulos','capitulos','trabajos_grado'], points="all", color="Cluster")
fig.show()
In [200]:
fig = px.box(df0, y=['articulos','capitulos','trabajos_grado'], points="all", color='Instituciones')
fig.show()
In [201]:
fig = px.box(df0, y=['innovaciones', 'libros', 'softwares'], points="all", color="Cluster")
fig.show()
In [202]:
fig = px.box(df0, y=['innovaciones', 'libros', 'softwares'], points="all",color='Instituciones')
fig.show()
In [203]:
df0 = df0.sort_values(by=['articulos'], ascending=False)
df0h = df0.head()
df0h
Out[203]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
812 812 378.0 45.0 52.0 85.0 92.0 14.0 0.0 0.0 0.0 ... 0.0 3.0 1.0 1.0 0.0 0.0 0.0 0.0 Hospital Pablo Tobón Uribe Avalado 0
1142 1142 337.0 31.0 19.0 16.0 84.0 15.0 0.0 0.0 3.0 ... 2.0 3.0 2.0 28.0 0.0 0.0 1.0 1.0 Sociedad Colombiana De Cardiología No Avalado 0
1141 1141 337.0 31.0 19.0 16.0 84.0 15.0 0.0 0.0 3.0 ... 2.0 3.0 2.0 28.0 0.0 0.0 1.0 1.0 Sociedad Colombiana De Cardiologia Y Cirugia C... 0
1430 1430 325.0 67.0 20.0 39.0 54.0 67.0 23.0 1.0 2.0 ... 5.0 5.0 2.0 7.0 0.0 0.0 3.0 0.0 University Of Western Australia No Avalado 0
609 609 303.0 38.0 29.0 37.0 11.0 22.0 3.0 0.0 3.0 ... 3.0 2.0 1.0 2.0 0.0 0.0 1.0 0.0 Fundacion Tomodachi No Avalado 0

5 rows × 22 columns

In [204]:
fig = px.pie(df0, values='articulos', names='Instituciones')
fig.show()

Cluster 1

In [205]:
df1 = df[df.Cluster == 1]
df1
Out[205]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
0 0 4.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 ... 1.0 4.0 0.0 0.0 0.0 0.0 0.0 0.0 A.I.D Biohacking Colombia Avalado 1
1 1 10.0 1.0 3.0 1.0 2.0 4.0 0.0 1.0 0.0 ... 7.0 1.0 0.0 3.0 0.0 0.0 1.0 0.0 Aalborg University No Avalado 1
2 2 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 1.0 2.0 0.0 0.0 0.0 0.0 Abogato Jurídico S.A.S. Avalado 1
3 3 9.0 0.0 0.0 2.0 7.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 Abya-Ayala No Avalado 1
4 4 5.0 0.0 0.0 0.0 0.0 12.0 0.0 0.0 1.0 ... 1.0 10.0 1.0 16.0 0.0 0.0 1.0 0.0 Academia Américana De Psiquiatría No Avalado 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1435 1435 10.0 2.0 0.0 2.0 0.0 0.0 1.0 0.0 0.0 ... 1.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 Vital Care Centro De Simulación Clinica Avalado 1
1436 1436 6.0 0.0 2.0 1.0 0.0 0.0 0.0 0.0 0.0 ... 2.0 2.0 0.0 2.0 1.0 0.0 1.0 0.0 Vocology Center Avalado 1
1439 1439 25.0 2.0 4.0 7.0 2.0 18.0 0.0 0.0 0.0 ... 1.0 3.0 0.0 2.0 0.0 0.0 2.0 0.0 Yoluka Ong, Fundación De Investigación En Biod... 1
1440 1440 0.0 0.0 0.0 0.0 0.0 0.0 11.0 0.0 0.0 ... 0.0 5.0 1.0 4.0 0.0 0.0 2.0 0.0 Zumo Tecnologia Avalado 1
1441 1441 16.0 6.0 2.0 0.0 3.0 0.0 0.0 0.0 0.0 ... 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 Zurich S.A. No Avalado 1

747 rows × 22 columns

In [206]:
fig = px.box(df1, y=['articulos','capitulos','trabajos_grado'], points="all", color="Cluster")
fig.show()
In [207]:
fig = px.box(df1, y=['articulos','capitulos','trabajos_grado'], points="all", color='Instituciones')
fig.show()
In [208]:
fig = px.box(df1, y=['innovaciones', 'libros', 'softwares'], points="all", color="Cluster")
fig.show()
In [209]:
fig = px.box(df1, y=['innovaciones', 'libros', 'softwares'], points="all",color='Instituciones')
fig.show()
In [210]:
df1 = df1.sort_values(by=['articulos'], ascending=False)
df1s = df1.head()
In [211]:
fig = px.pie(df1s, values='articulos', names='Instituciones')
fig.show()

Cluster 2

In [212]:
df2 = df[df.Cluster == 2]
df2
Out[212]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
21 21 22.000000 0.000000 1.000000 2.000000 2.000000 29.000000 51.000000 0.000000 0.000000 ... 8.000000 20.000000 4.000000 1.000000 2.000000 0.000000 3.000000 0.000000 Alcaldia Municipal De Popayan No Avalado 2
93 93 262.000000 72.000000 19.000000 18.000000 12.000000 3.000000 0.000000 2.000000 12.000000 ... 15.000000 11.000000 0.000000 3.000000 2.000000 0.000000 3.000000 3.000000 Audifarma S.A. No Avalado 2
148 148 62.000000 33.000000 10.000000 7.000000 5.000000 13.000000 1.000000 0.000000 11.000000 ... 29.000000 25.000000 2.000000 7.000000 2.000000 0.000000 11.000000 4.000000 Centro De Bioinformatica Y Biologia Computacio... 2
163 163 171.000000 2.000000 2.000000 3.000000 2.000000 68.000000 0.000000 0.000000 0.000000 ... 15.000000 12.000000 1.000000 8.000000 4.000000 2.000000 2.000000 3.000000 Centro De Estudios Y Asesorías En Ciencias Soc... 2
185 185 50.500000 18.500000 6.000000 5.000000 4.500000 11.000000 2.500000 1.000000 13.000000 ... 12.500000 24.500000 5.000000 20.500000 4.500000 0.000000 3.500000 1.000000 Centro De Investigación Y Desarrollo Tecnológi... 2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1342 1342 124.000000 28.000000 12.000000 23.000000 8.000000 26.000000 0.000000 0.000000 0.000000 ... 21.000000 21.000000 0.000000 11.000000 2.000000 0.000000 6.000000 5.000000 Universidad Eia No Avalado 2
1404 1404 67.000000 8.000000 3.333333 8.333333 3.666667 44.333333 1.000000 1.333333 0.666667 ... 14.666667 16.000000 4.000000 13.000000 1.666667 0.666667 2.666667 0.666667 Universidad Santo Tomás, Seccional Bucaramanga... 2
1407 1407 92.136364 15.363636 7.545455 19.636364 16.272727 49.659091 9.431818 1.136364 4.045455 ... 9.136364 12.340909 2.386364 6.318182 2.659091 0.181818 2.750000 1.636364 Universidad Simón Bolívar Avalado 2
1415 1415 101.500000 17.500000 9.500000 32.500000 20.500000 37.000000 18.500000 0.500000 9.000000 ... 17.500000 16.000000 1.500000 4.000000 3.500000 0.500000 4.500000 8.500000 Universidad Tecnológica De Bolívar No Avalado 2
1418 1418 132.000000 12.000000 3.000000 18.000000 13.000000 11.000000 0.000000 2.000000 0.000000 ... 13.000000 10.000000 0.000000 0.000000 5.000000 0.000000 2.000000 0.000000 Universidade Estadual Paulista Júlio De Mesqui... 2

67 rows × 22 columns

In [213]:
fig = px.box(df2, y=['articulos','capitulos','trabajos_grado'], points="all", color="Cluster")
fig.show()
In [214]:
fig = px.box(df2, y=['articulos','capitulos','trabajos_grado'], points="all", color='Instituciones')
fig.show()
In [215]:
fig = px.box(df2, y=['innovaciones', 'libros', 'softwares'], points="all", color="Cluster")
fig.show()
In [216]:
fig = px.box(df2, y=['innovaciones', 'libros', 'softwares'], points="all",color='Instituciones')
fig.show()
In [217]:
df2 = df2.sort_values(by=['articulos'], ascending=False)
df2.head()
Out[217]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
93 93 262.0 72.0 19.0 18.0 12.0 3.0 0.0 2.0 12.0 ... 15.0 11.0 0.0 3.0 2.0 0.0 3.0 3.0 Audifarma S.A. No Avalado 2
456 456 214.0 33.0 47.0 45.0 32.0 12.0 0.0 0.0 34.0 ... 14.0 12.0 8.0 5.0 2.0 0.0 2.0 3.0 E Energia Eficiente S.A. E.S.P. Avalado 2
429 429 174.0 54.0 8.0 17.0 40.0 29.0 1.0 1.0 2.0 ... 10.0 43.0 5.0 19.0 2.0 0.0 2.0 4.0 Corporación Vidarium, Centro De Investigación ... 2
163 163 171.0 2.0 2.0 3.0 2.0 68.0 0.0 0.0 0.0 ... 15.0 12.0 1.0 8.0 4.0 2.0 2.0 3.0 Centro De Estudios Y Asesorías En Ciencias Soc... 2
781 781 170.5 25.0 8.0 35.0 17.5 6.5 4.0 0.0 0.5 ... 17.5 18.0 0.5 12.0 2.0 0.0 5.0 1.5 Grupo Labservis Ltda No Avalado 2

5 rows × 22 columns

In [218]:
df2 = df2.sort_values(by=['articulos'], ascending=False)
df2s = df2.head()
fig = px.pie(df2s, values='articulos', names='Instituciones')
fig.show()

Cluster 3

In [219]:
df3 = df[df.Cluster == 3]
df3
Out[219]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
84 84 480.000000 70.000000 43.000000 63.000000 75.0 134.000000 1.0 1.000000 1.000000 ... 27.000000 16.0 1.0 13.000000 3.0 1.0 2.000000 5.000000 Asociación Hortifrutícola De Colombia - Asohof... 3
197 197 933.000000 315.000000 148.000000 155.000000 103.0 53.000000 2.0 2.000000 2.000000 ... 10.000000 31.0 22.0 19.000000 3.0 0.0 5.000000 1.000000 Centro Internacional Para Prueba De Vacunas Y ... 3
332 332 455.000000 229.000000 44.000000 68.000000 7.0 11.000000 0.0 0.000000 2.000000 ... 27.000000 29.0 4.0 62.000000 1.0 1.0 6.000000 1.000000 Corporación Centro Internacional De Entrenamie... 3
333 333 933.000000 315.000000 148.000000 155.000000 103.0 53.000000 2.0 2.000000 2.000000 ... 10.000000 31.0 22.0 19.000000 3.0 0.0 5.000000 1.000000 Corporación Centro Internacional De Entrenamie... 3
345 345 509.000000 180.000000 77.000000 69.000000 40.0 22.000000 6.0 2.000000 0.000000 ... 27.000000 28.0 4.0 33.000000 1.0 1.0 5.000000 4.000000 Corporación De Innovación Para El Desarrollo D... 3
452 452 413.000000 86.000000 30.000000 74.000000 23.0 52.000000 11.0 1.000000 1.000000 ... 25.000000 26.0 7.0 22.000000 2.0 1.0 6.000000 7.000000 Dirección Seccional De Salud De Antioquia No A... 3
633 633 507.666667 150.666667 89.666667 94.666667 66.0 31.333333 1.0 0.666667 1.333333 ... 4.333333 16.0 8.0 9.666667 1.0 0.0 2.333333 0.666667 Fundacion Valle Del Lili Avalado 3
686 686 374.500000 200.000000 53.000000 13.000000 5.5 14.000000 3.0 0.000000 0.000000 ... 44.500000 34.0 5.0 29.000000 2.5 0.5 16.000000 4.000000 Fundación Instituto De Inmunología De Colombia... 3
896 896 392.000000 64.000000 32.000000 69.000000 99.0 69.000000 1.0 6.000000 4.000000 ... 18.000000 24.0 6.0 5.000000 5.0 0.0 1.000000 6.000000 Instituto Colombiano De Neuropedagogia No Avalado 3
916 916 442.000000 189.000000 58.000000 53.000000 27.0 629.000000 0.0 2.000000 1.000000 ... 30.000000 70.0 8.0 39.000000 2.0 0.0 15.000000 1.000000 Instituto De Investigación De Recursos Biológi... 3
1143 1143 480.000000 70.000000 43.000000 63.000000 75.0 134.000000 1.0 1.000000 1.000000 ... 27.000000 16.0 1.0 13.000000 3.0 1.0 2.000000 5.000000 Sociedad Colombiana De Ciencias Hortícolas No ... 3
1352 1352 933.000000 315.000000 148.000000 155.000000 103.0 53.000000 2.0 2.000000 2.000000 ... 10.000000 31.0 22.0 19.000000 3.0 0.0 5.000000 1.000000 Universidad Icesi No Avalado 3

12 rows × 22 columns

In [220]:
fig = px.box(df3, y=['articulos','capitulos','innovaciones','trabajos_grado'], points="all", color="Cluster")
fig.show()
In [221]:
fig = px.box(df3, y=['articulos','capitulos','innovaciones','trabajos_grado'], points="all", color='Instituciones')
fig.show()
In [222]:
fig = px.box(df3, y=['libros', 'softwares'], points="all", color="Cluster")
fig.show()
In [223]:
fig = px.box(df3, y=['libros', 'softwares'], points="all",color='Instituciones')
fig.show()
In [224]:
df3 = df3.sort_values(by=['articulos'], ascending=False)
df3.head()
Out[224]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
197 197 933.000000 315.000000 148.000000 155.000000 103.0 53.000000 2.0 2.000000 2.000000 ... 10.000000 31.0 22.0 19.000000 3.0 0.0 5.000000 1.000000 Centro Internacional Para Prueba De Vacunas Y ... 3
333 333 933.000000 315.000000 148.000000 155.000000 103.0 53.000000 2.0 2.000000 2.000000 ... 10.000000 31.0 22.0 19.000000 3.0 0.0 5.000000 1.000000 Corporación Centro Internacional De Entrenamie... 3
1352 1352 933.000000 315.000000 148.000000 155.000000 103.0 53.000000 2.0 2.000000 2.000000 ... 10.000000 31.0 22.0 19.000000 3.0 0.0 5.000000 1.000000 Universidad Icesi No Avalado 3
345 345 509.000000 180.000000 77.000000 69.000000 40.0 22.000000 6.0 2.000000 0.000000 ... 27.000000 28.0 4.0 33.000000 1.0 1.0 5.000000 4.000000 Corporación De Innovación Para El Desarrollo D... 3
633 633 507.666667 150.666667 89.666667 94.666667 66.0 31.333333 1.0 0.666667 1.333333 ... 4.333333 16.0 8.0 9.666667 1.0 0.0 2.333333 0.666667 Fundacion Valle Del Lili Avalado 3

5 rows × 22 columns

In [225]:
df3 = df3.sort_values(by=['articulos'], ascending=False)
df3s = df3.head()
fig = px.pie(df3s, values='articulos', names='Instituciones')
fig.show()

Cluster 4

In [226]:
df4 = df[df.Cluster == 4]
df4
Out[226]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
15 15 4.0 1.0 0.0 0.0 0.0 16.0 2.0 0.0 0.0 ... 8.0 20.0 16.0 13.0 1.0 0.0 4.0 0.0 Agencia Para La Reincorporación Y La Normaliza... 4
16 16 15.0 0.0 0.0 1.0 1.0 38.0 0.0 0.0 3.0 ... 1.0 21.0 11.0 6.0 0.0 0.0 0.0 0.0 Agraf Industrial S.A. No Avalado 4
17 17 235.0 55.0 32.0 18.0 17.0 50.0 1.0 0.0 4.0 ... 1.0 9.0 6.0 52.0 0.0 0.0 2.0 1.0 Agremiacion De Gineco-Obstetras De Cartagena Y... 4
32 32 88.0 59.0 7.0 5.0 2.0 3.5 0.5 0.0 0.0 ... 6.0 17.0 4.5 14.0 1.0 0.0 4.0 1.0 Alzak Foundation Avalado 4
37 37 4.0 0.0 0.0 0.0 0.0 3.0 2.0 0.0 0.0 ... 1.0 11.0 9.0 12.0 0.0 0.0 2.0 0.0 Andina Diseñadores No Avalado 4
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1355 1355 132.0 44.0 28.0 6.0 10.0 7.0 0.0 0.0 0.0 ... 14.0 17.0 2.0 25.0 0.0 0.0 2.0 0.0 Universidad Interamericana De Puerto Rico, San... 4
1369 1369 64.0 5.0 5.0 7.0 22.0 11.0 0.0 0.0 0.0 ... 2.0 16.0 6.0 12.0 0.0 0.0 3.0 2.0 Universidad Metropolitana De Ciencias De La Ed... 4
1399 1399 78.3 2.8 1.6 3.1 2.6 26.9 4.3 1.2 3.8 ... 8.9 21.0 5.1 9.6 0.7 0.0 2.6 0.1 Universidad Santo Tomas Seccional Tunja Avalado 4
1400 1400 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 14.0 21.0 3.0 9.0 0.0 1.0 2.0 0.0 Universidad Santo Tomas Seccional Tunja No Ava... 4
1438 1438 141.0 18.0 14.0 14.0 24.0 19.0 0.0 0.0 0.0 ... 10.0 22.0 3.0 20.0 0.0 0.0 6.0 3.0 Yarima Guadua E. U. No Avalado 4

136 rows × 22 columns

In [227]:
fig = px.box(df4, y=['articulos','capitulos','innovaciones','trabajos_grado'], points="all", color="Cluster")
fig.show()
In [228]:
fig = px.box(df4, y=['articulos','capitulos','innovaciones','trabajos_grado'], points="all", color='Instituciones')
fig.show()
In [229]:
fig = px.box(df4, y=['libros', 'softwares'], points="all", color="Cluster")
fig.show()
In [230]:
fig = px.box(df4, y=['libros', 'softwares'], points="all",color='Instituciones')
fig.show()
In [231]:
df4 = df4.sort_values(by=['articulos'], ascending=False)
df4.head()
Out[231]:
Unnamed: 0 articulos SJR_Q_Q1 SJR_Q_Q2 SJR_Q_Q3 SJR_Q_Q4 capitulos innovaciones libros softwares ... doctorado maestria Especializacion pregrado Investigador_Asociado Investigador_Emerito Investigador_Junior Investigador Senior Instituciones Cluster
669 669 235.0 55.0 32.0 18.0 17.0 50.0 1.0 0.0 4.0 ... 1.0 9.0 6.0 52.0 0.0 0.0 2.0 1.0 Fundación Grupo De Investigación En Cuidados I... 4
17 17 235.0 55.0 32.0 18.0 17.0 50.0 1.0 0.0 4.0 ... 1.0 9.0 6.0 52.0 0.0 0.0 2.0 1.0 Agremiacion De Gineco-Obstetras De Cartagena Y... 4
763 763 235.0 55.0 32.0 18.0 17.0 50.0 1.0 0.0 4.0 ... 1.0 9.0 6.0 52.0 0.0 0.0 2.0 1.0 Gestion Salud Avalado 4
391 391 235.0 55.0 32.0 18.0 17.0 50.0 1.0 0.0 4.0 ... 1.0 9.0 6.0 52.0 0.0 0.0 2.0 1.0 Corporación Sociedad Colombiana De Pedagogia -... 4
230 230 206.0 26.0 6.0 28.0 15.0 23.0 0.0 0.0 0.0 ... 0.0 7.0 11.0 2.0 0.0 0.0 2.0 1.0 Clínica Del Prado No Avalado 4

5 rows × 22 columns

In [232]:
df4 = df4.sort_values(by=['articulos'], ascending=False)
df4s = df4.head()
fig = px.pie(df4s, values='articulos', names='Instituciones')
fig.show()

.

.

Borrador, INCOMPLETO HACIA ABAJO

.

Correlación

In [233]:
corr = df.corr()
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
    f, ax = plt.subplots(figsize=(15, 15))
    ax = sns.heatmap(corr,mask=mask,square=True,linewidths=.8,cmap="YlGnBu")
In [234]:
"""### Presentacion de los datos

"""
a = df['articulos']
Q1 = df['SJR_Q_Q1']
Q2 = df['SJR_Q_Q2']
Q3 = df['SJR_Q_Q3']
Q4 = df['SJR_Q_Q4']
In = df['Instituciones']
C = df['Cluster']
P = df['pregrado']
i = df['integrantes']
Ij = df['Investigador_Junior']
In [235]:
type(C)
Out[235]:
pandas.core.series.Series
In [236]:
from IPython.core.interactiveshell import SpaceInInput
In [237]:
# Grafico de conjunto de datos de todas las variables

fig = plt.figure(figsize=(10,8), dpi=150)

ax1 = plt.subplot(3,3,1)
ax1.plot(C, Q1, lw='0.75', color='r')
ax1.set_title('Cluster vs Q1')
ax1.set_xlabel('Cluster')
ax1.set_ylabel('Q1')

ax2 = plt.subplot(3,3,2)
ax2.plot(C, Q2, lw='0.75', color='g')
ax2.set_title('Cluster vs Q2')
ax2.set_xlabel('Cluster')
ax2.set_ylabel('Q2')

ax3= plt.subplot(3,3,3)
ax3.plot(C, Q3, lw='0.75', color='b')
ax3.set_title('Cluster vs Q3')
ax3.set_xlabel('Cluster')
ax3.set_ylabel('Q3')

ax4 = plt.subplot(3,3,4)
ax4.plot(C, Q4, lw='0.75', color='c')
ax4.set_title('Cluster vs Q4')
ax4.set_xlabel('Cluster')
ax4.set_ylabel('Q4')

#ax5 = plt.subplot(3,3,5)
#ax5.plot(a, In, lw='0.75', color='m')
#ax5.set_title('Instituciones con mas articulos')
#ax5.set_xlabel('Articulos')
#ax5.set_ylabel('Instituciones')

# ax6 = plt.subplot(3,3,6)
# ax6.plot(i, Ij, lw='1.50', color=[0.2,0.25,1.0])
# ax6.set_title('Investigadores Jr entre integrantes')
# ax6.set_xlabel('Integrantes')
# ax6.set_ylabel('Investigadores Jr')


# Prevenir la superposicion de los titulos de subplots con los ejes contiguos 

plt.tight_layout()



fig.subplots_adjust(top=0.90)


plt.show()
In [238]:
#%%shell
#jupyter nbconvert --to html /EDA_Clusters_3BIO.ipynb